/*

Copyright (c) 2021, Dominic Szablewski - https://phoboslab.org
SPDX-License-Identifier: MIT


Simple benchmark suite for png, stbi and qoi

Requires libpng, "stb_image.h" and "stb_image_write.h"
Compile with: 
	gcc qoibench.c -std=gnu99 -lpng -O3 -o qoibench 

*/

#include <stdio.h>
#include <dirent.h>
#include <png.h>

#define STB_IMAGE_IMPLEMENTATION
#define STBI_ONLY_PNG
#define STBI_NO_LINEAR
#include "stb_image.h"

#define STB_IMAGE_WRITE_IMPLEMENTATION
#include "stb_image_write.h"

#define QOI_IMPLEMENTATION
#include "qoi.h"




// -----------------------------------------------------------------------------
// Cross platform high resolution timer
// From https://gist.github.com/ForeverZer0/0a4f80fc02b96e19380ebb7a3debbee5

#include <stdint.h>
#if defined(__linux)
	#define HAVE_POSIX_TIMER
	#include <time.h>
	#ifdef CLOCK_MONOTONIC
		#define CLOCKID CLOCK_MONOTONIC
	#else
		#define CLOCKID CLOCK_REALTIME
	#endif
#elif defined(__APPLE__)
	#define HAVE_MACH_TIMER
	#include <mach/mach_time.h>
#elif defined(_WIN32)
	#define WIN32_LEAN_AND_MEAN
	#include <windows.h>
#endif

static uint64_t ns() {
	static uint64_t is_init = 0;
#if defined(__APPLE__)
		static mach_timebase_info_data_t info;
		if (0 == is_init) {
			mach_timebase_info(&info);
			is_init = 1;
		}
		uint64_t now;
		now = mach_absolute_time();
		now *= info.numer;
		now /= info.denom;
		return now;
#elif defined(__linux)
		static struct timespec linux_rate;
		if (0 == is_init) {
			clock_getres(CLOCKID, &linux_rate);
			is_init = 1;
		}
		uint64_t now;
		struct timespec spec;
		clock_gettime(CLOCKID, &spec);
		now = spec.tv_sec * 1.0e9 + spec.tv_nsec;
		return now;
#elif defined(_WIN32)
		static LARGE_INTEGER win_frequency;
		if (0 == is_init) {
			QueryPerformanceFrequency(&win_frequency);
			is_init = 1;
		}
		LARGE_INTEGER now;
		QueryPerformanceCounter(&now);
		return (uint64_t) ((1e9 * now.QuadPart)	/ win_frequency.QuadPart);
#endif
}

#define STRINGIFY(x) #x
#define TOSTRING(x) STRINGIFY(x)
#define ERROR(...) printf("abort at line " TOSTRING(__LINE__) ": " __VA_ARGS__); printf("\n"); exit(1)


// -----------------------------------------------------------------------------
// libpng encode/decode wrappers
// Seriously, who thought this was a good abstraction for an API to read/write
// images?

typedef struct {
	int size;
	int capacity;
	unsigned char *data;
} libpng_write_t;

void libpng_encode_callback(png_structp png_ptr, png_bytep data, png_size_t length) {
	libpng_write_t *write_data = (libpng_write_t*)png_get_io_ptr(png_ptr);
	if (write_data->size + length >= write_data->capacity) {
		ERROR("PNG write");
	}
	memcpy(write_data->data + write_data->size, data, length);
	write_data->size += length;
}

void *libpng_encode(void *pixels, int w, int h, int channels, int *out_len) {
	png_structp png = png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
	if (!png) {
		ERROR("png_create_write_struct");
	}

	png_infop info = png_create_info_struct(png);
	if (!info) {
		ERROR("png_create_info_struct");
	}

	if (setjmp(png_jmpbuf(png))) {
		ERROR("png_jmpbuf");
	}

	// Output is 8bit depth, RGBA format.
	png_set_IHDR(
		png,
		info,
		w, h,
		8,
		channels == 3 ? PNG_COLOR_TYPE_RGB : PNG_COLOR_TYPE_RGBA,
		PNG_INTERLACE_NONE,
		PNG_COMPRESSION_TYPE_DEFAULT,
		PNG_FILTER_TYPE_DEFAULT
	);

	png_bytep row_pointers[h];
	for(int y = 0; y < h; y++){
		row_pointers[y] = ((unsigned char *)pixels + y * w * channels);
	}

	libpng_write_t write_data = {
		.size = 0,
		.capacity = w * h * channels,
		.data = malloc(w * h * channels)
	};

	png_set_rows(png, info, row_pointers);
	png_set_write_fn(png, &write_data, libpng_encode_callback, NULL);
	png_write_png(png, info, PNG_TRANSFORM_IDENTITY, NULL);

	png_destroy_write_struct(&png, &info);

	*out_len = write_data.size;
	return write_data.data;
}


typedef struct {
	int pos;
	int size;
	unsigned char *data;
} libpng_read_t;

void png_decode_callback(png_structp png, png_bytep data, png_size_t length) {
	libpng_read_t *read_data = (libpng_read_t*)png_get_io_ptr(png);
	if (read_data->pos + length > read_data->size) {
		ERROR("PNG read %ld bytes at pos %d (size: %d)", length, read_data->pos, read_data->size);
	}
	memcpy(data, read_data->data + read_data->pos, length);
	read_data->pos += length;
}

void png_warning_callback(png_structp png_ptr, png_const_charp warning_msg) {
	// Ignore warnings about sRGB profiles and such.
}

void *libpng_decode(void *data, int size, int *out_w, int *out_h) {	
	png_structp png = png_create_read_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, png_warning_callback);
	if (!png) {
		ERROR("png_create_read_struct");
	}

	png_infop info = png_create_info_struct(png);
	if (!info) {
		ERROR("png_create_info_struct");
	}

	libpng_read_t read_data = {
		.pos = 0,
		.size = size,
		.data = data
	};
	
	png_set_read_fn(png, &read_data, png_decode_callback);
	png_set_sig_bytes(png, 0);
	png_read_info(png, info);
	
	png_uint_32 w, h;
	int bitDepth, colorType, interlaceType;
	png_get_IHDR(png, info, &w, &h, &bitDepth, &colorType, &interlaceType, NULL, NULL);
	
	// 16 bit -> 8 bit
	png_set_strip_16(png);
	
	// 1, 2, 4 bit -> 8 bit
	if (bitDepth < 8) {
		png_set_packing(png);
	}

	if (colorType & PNG_COLOR_MASK_PALETTE) {
		png_set_expand(png);
	}
	
	if (!(colorType & PNG_COLOR_MASK_COLOR)) {
		png_set_gray_to_rgb(png);
	}

	// set paletted or RGB images with transparency to full alpha so we get RGBA
	if (png_get_valid(png, info, PNG_INFO_tRNS)) {
		png_set_tRNS_to_alpha(png);
	}
	
	// make sure every pixel has an alpha value
	if (!(colorType & PNG_COLOR_MASK_ALPHA)) {
		png_set_filler(png, 255, PNG_FILLER_AFTER);
	}
	
	png_read_update_info(png, info);

	unsigned char* out = malloc(w * h * 4);
	*out_w = w;
	*out_h = h;
	
	// png_uint_32 rowBytes = png_get_rowbytes(png, info);
	png_bytep row_pointers[h];
	for (png_uint_32 row = 0; row < h; row++ ) {
		row_pointers[row] = (png_bytep)(out + (row * w * 4));
	}
	
	png_read_image(png, row_pointers);
	png_read_end(png, info);
	png_destroy_read_struct( &png, &info, NULL);
	
	return out;
}


// -----------------------------------------------------------------------------
// stb_image encode callback

void stbi_write_callback(void *context, void *data, int size) {
	int *encoded_size = (int *)context;
	*encoded_size += size;
	// In theory we'd need to do another malloc(), memcpy() and free() here to 
	// be fair to the other decode functions...
}


// -----------------------------------------------------------------------------
// function to load a whole file into memory

void *fload(const char *path, int *out_size) {
	FILE *fh = fopen(path, "rb");
	if (!fh) {
		ERROR("Can't open file");
	}

	fseek(fh, 0, SEEK_END);
	int size = ftell(fh);
	fseek(fh, 0, SEEK_SET);

	void *buffer = malloc(size);
	if (!buffer) {
		ERROR("Malloc for %d bytes failed", size);
	}

	if (!fread(buffer, size, 1, fh)) {
		ERROR("Can't read file %s", path);
	}
	fclose(fh);

	*out_size = size;
	return buffer;
}


// -----------------------------------------------------------------------------
// benchmark runner


int opt_runs = 1;
int opt_nopng = 0;
int opt_nowarmup = 0;
int opt_noverify = 0;
int opt_nodecode = 0;
int opt_noencode = 0;
int opt_norecurse = 0;
int opt_onlytotals = 0;

enum {
	LIBPNG,
	STBI,
	QOI,
	BENCH_COUNT /* must be the last element */
};
static const char *const lib_names[BENCH_COUNT] = {
	// NOTE: pad with spaces so everything lines up properly
	[LIBPNG] =  "libpng: ",
	[STBI]   =  "stbi:   ",
	[QOI]    =  "qoi:    ",
};

typedef struct {
	uint64_t size;
	uint64_t encode_time;
	uint64_t decode_time;
} benchmark_lib_result_t;

typedef struct {
	int count;
	uint64_t raw_size;
	uint64_t px;
	int w;
	int h;
	benchmark_lib_result_t libs[BENCH_COUNT];
} benchmark_result_t;


void benchmark_print_result(benchmark_result_t res) {
	res.px /= res.count;
	res.raw_size /= res.count;

	double px = res.px;
	printf("          decode ms   encode ms   decode mpps   encode mpps   size kb    rate\n");
	for (int i = 0; i < BENCH_COUNT; ++i) {
		if (opt_nopng && (i == LIBPNG || i == STBI)) {
			continue;
		}
		res.libs[i].encode_time /= res.count;
		res.libs[i].decode_time /= res.count;
		res.libs[i].size /= res.count;
		printf(
			"%s   %8.1f    %8.1f      %8.2f      %8.2f  %8ld   %4.1f%%\n",
			lib_names[i],
			(double)res.libs[i].decode_time/1000000.0,
			(double)res.libs[i].encode_time/1000000.0,
			(res.libs[i].decode_time > 0 ? px / ((double)res.libs[i].decode_time/1000.0) : 0),
			(res.libs[i].encode_time > 0 ? px / ((double)res.libs[i].encode_time/1000.0) : 0),
			res.libs[i].size/1024,
			((double)res.libs[i].size/(double)res.raw_size) * 100.0
		);
	}
	printf("\n");
}

// Run __VA_ARGS__ a number of times and measure the time taken. The first
// run is ignored.
#define BENCHMARK_FN(NOWARMUP, RUNS, AVG_TIME, ...) \
	do { \
		uint64_t time = 0; \
		for (int i = NOWARMUP; i <= RUNS; i++) { \
			uint64_t time_start = ns(); \
			__VA_ARGS__ \
			uint64_t time_end = ns(); \
			if (i > 0) { \
				time += time_end - time_start; \
			} \
		} \
		AVG_TIME = time / RUNS; \
	} while (0)


benchmark_result_t benchmark_image(const char *path) {
	int encoded_png_size;
	int encoded_qoi_size;
	int w;
	int h;
	int channels;

	// Load the encoded PNG, encoded QOI and raw pixels into memory
	if(!stbi_info(path, &w, &h, &channels)) {
		ERROR("Error decoding header %s", path);
	}

	if (channels != 3) {
		channels = 4;
	}

	void *pixels = (void *)stbi_load(path, &w, &h, NULL, channels);
	void *encoded_png = fload(path, &encoded_png_size);
	void *encoded_qoi = qoi_encode(pixels, &(qoi_desc){
			.width = w,
			.height = h, 
			.channels = channels,
			.colorspace = QOI_SRGB
		}, &encoded_qoi_size);

	if (!pixels || !encoded_qoi || !encoded_png) {
		ERROR("Error encoding %s", path);
	}

	// Verify QOI Output

	if (!opt_noverify) {
		qoi_desc dc;
		void *pixels_qoi = qoi_decode(encoded_qoi, encoded_qoi_size, &dc, channels);
		if (memcmp(pixels, pixels_qoi, w * h * channels) != 0) {
			ERROR("QOI roundtrip pixel mismatch for %s", path);
		}
		free(pixels_qoi);
	}



	benchmark_result_t res = {0};
	res.count = 1;
	res.raw_size = w * h * channels;
	res.px = w * h;
	res.w = w;
	res.h = h;


	// Decoding

	if (!opt_nodecode) {
		if (!opt_nopng) {
			BENCHMARK_FN(opt_nowarmup, opt_runs, res.libs[LIBPNG].decode_time, {
				int dec_w, dec_h;
				void *dec_p = libpng_decode(encoded_png, encoded_png_size, &dec_w, &dec_h);
				free(dec_p);
			});

			BENCHMARK_FN(opt_nowarmup, opt_runs, res.libs[STBI].decode_time, {
				int dec_w, dec_h, dec_channels;
				void *dec_p = stbi_load_from_memory(encoded_png, encoded_png_size, &dec_w, &dec_h, &dec_channels, 4);
				free(dec_p);
			});
		}

		BENCHMARK_FN(opt_nowarmup, opt_runs, res.libs[QOI].decode_time, {
			qoi_desc desc;
			void *dec_p = qoi_decode(encoded_qoi, encoded_qoi_size, &desc, 4);
			free(dec_p);
		});
	}


	// Encoding
	if (!opt_noencode) {
		if (!opt_nopng) {
			BENCHMARK_FN(opt_nowarmup, opt_runs, res.libs[LIBPNG].encode_time, {
				int enc_size;
				void *enc_p = libpng_encode(pixels, w, h, channels, &enc_size);
				res.libs[LIBPNG].size = enc_size;
				free(enc_p);
			});

			BENCHMARK_FN(opt_nowarmup, opt_runs, res.libs[STBI].encode_time, {
				int enc_size = 0;
				stbi_write_png_to_func(stbi_write_callback, &enc_size, w, h, channels, pixels, 0);
				res.libs[STBI].size = enc_size;
			});
		}

		BENCHMARK_FN(opt_nowarmup, opt_runs, res.libs[QOI].encode_time, {
			int enc_size;
			void *enc_p = qoi_encode(pixels, &(qoi_desc){
				.width = w,
				.height = h, 
				.channels = channels,
				.colorspace = QOI_SRGB
			}, &enc_size);
			res.libs[QOI].size = enc_size;
			free(enc_p);
		});
	}

	free(pixels);
	free(encoded_png);
	free(encoded_qoi);

	return res;
}

void benchmark_directory(const char *path, benchmark_result_t *grand_total) {
	DIR *dir = opendir(path);
	if (!dir) {
		ERROR("Couldn't open directory %s", path);
	}

	struct dirent *file;

	if (!opt_norecurse) {
		for (int i = 0; (file = readdir(dir)) != NULL; i++) {
			if (
				file->d_type & DT_DIR &&
				strcmp(file->d_name, ".") != 0 &&
				strcmp(file->d_name, "..") != 0
			) {
				char subpath[1024];
				snprintf(subpath, 1024, "%s/%s", path, file->d_name);
				benchmark_directory(subpath, grand_total);
			}
		}
		rewinddir(dir);
	}

	benchmark_result_t dir_total = {0};
	
	int has_shown_head = 0;
	for (int i = 0; (file = readdir(dir)) != NULL; i++) {
		if (strcmp(file->d_name + strlen(file->d_name) - 4, ".png") != 0) {
			continue;
		}

		if (!has_shown_head) {
			has_shown_head = 1;
			printf("## Benchmarking %s/*.png -- %d runs\n\n", path, opt_runs);
		}

		char *file_path = malloc(strlen(file->d_name) + strlen(path)+8);
		sprintf(file_path, "%s/%s", path, file->d_name);
		
		benchmark_result_t res = benchmark_image(file_path);

		if (!opt_onlytotals) {
			printf("## %s size: %dx%d\n", file_path, res.w, res.h);
			benchmark_print_result(res);
		}

		free(file_path);
		
		dir_total.count++;
		dir_total.raw_size += res.raw_size;
		dir_total.px += res.px;
		for (int i = 0; i < BENCH_COUNT; ++i) {
			dir_total.libs[i].encode_time += res.libs[i].encode_time;
			dir_total.libs[i].decode_time += res.libs[i].decode_time;
			dir_total.libs[i].size += res.libs[i].size;
		}

		grand_total->count++;
		grand_total->raw_size += res.raw_size;
		grand_total->px += res.px;
		for (int i = 0; i < BENCH_COUNT; ++i) {
			grand_total->libs[i].encode_time += res.libs[i].encode_time;
			grand_total->libs[i].decode_time += res.libs[i].decode_time;
			grand_total->libs[i].size += res.libs[i].size;
		}
	}
	closedir(dir);

	if (dir_total.count > 0) {
		printf("## Total for %s\n", path);
		benchmark_print_result(dir_total);
	}
}

int main(int argc, char **argv) {
	if (argc < 3) {
		printf("Usage: qoibench <iterations> <directory> [options]\n");
		printf("Options:\n");
		printf("    --nowarmup ... don't perform a warmup run\n");
		printf("    --nopng ...... don't run png encode/decode\n");
		printf("    --noverify ... don't verify qoi roundtrip\n");
		printf("    --noencode ... don't run encoders\n");
		printf("    --nodecode ... don't run decoders\n");
		printf("    --norecurse .. don't descend into directories\n");
		printf("    --onlytotals . don't print individual image results\n");
		printf("Examples\n");
		printf("    qoibench 10 images/textures/\n");
		printf("    qoibench 1 images/textures/ --nopng --nowarmup\n");
		exit(1);
	}

	for (int i = 3; i < argc; i++) {
		if (strcmp(argv[i], "--nowarmup") == 0) { opt_nowarmup = 1; }
		else if (strcmp(argv[i], "--nopng") == 0) { opt_nopng = 1; }
		else if (strcmp(argv[i], "--noverify") == 0) { opt_noverify = 1; }
		else if (strcmp(argv[i], "--noencode") == 0) { opt_noencode = 1; }
		else if (strcmp(argv[i], "--nodecode") == 0) { opt_nodecode = 1; }
		else if (strcmp(argv[i], "--norecurse") == 0) { opt_norecurse = 1; }
		else if (strcmp(argv[i], "--onlytotals") == 0) { opt_onlytotals = 1; }
		else { ERROR("Unknown option %s", argv[i]); }
	}

	opt_runs = atoi(argv[1]);
	if (opt_runs <=0) {
		ERROR("Invalid number of runs %d", opt_runs);
	}

	benchmark_result_t grand_total = {0};
	benchmark_directory(argv[2], &grand_total);

	if (grand_total.count > 0) {
		printf("# Grand total for %s\n", argv[2]);
		benchmark_print_result(grand_total);
	}
	else {
		printf("No images found in %s\n", argv[2]);
	}

	return 0;
}
